from cProfile import label
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import bar_chart_race as bcr
from matplotlib import rc
rc('animation',html='jshtml')
df = pd.read_csv("data\publications_per_year.csv", index_col = "Year")
df.head()
| Afghanistan | Albania | Algeria | Angola | Argentina | Armenia | Australia | Azerbaijan | Bahrain | Bangladesh | ... | United Arab Emirates | United Kingdom | United States | Uruguay | Uzbekistan | Venezuela | Viet Nam | Yemen | Zambia | Zimbabwe | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Year | |||||||||||||||||||||
| 1980 | 0.0 | 1.0 | 0.0 | 0.0 | 19.500000 | 0.0 | 498.716667 | 1.0 | 1.500 | 0.000000 | ... | 12.45 | 1708.472342 | 10746.88088 | 0.0 | 0.0 | 7.166667 | 0.0 | 0.0 | 2.0 | 1.000000 |
| 1981 | 0.0 | 0.0 | 0.0 | 0.5 | 19.500000 | 2.0 | 499.537121 | 0.0 | 3.000 | 2.300000 | ... | 22.25 | 1697.864286 | 11740.56502 | 0.0 | 0.0 | 7.250000 | 0.2 | 0.0 | 4.0 | 1.500000 |
| 1982 | 0.0 | 0.0 | 1.0 | 1.0 | 13.066667 | 1.0 | 542.394444 | 1.0 | 7.375 | 1.000000 | ... | 20.25 | 1955.949242 | 12438.06136 | 1.0 | 1.0 | 11.900000 | 0.0 | 0.0 | 0.0 | 4.000000 |
| 1983 | 0.0 | 0.0 | 1.0 | 1.0 | 13.066667 | 1.0 | 542.394444 | 1.0 | 7.375 | 1.000000 | ... | 20.25 | 1955.949242 | 12438.06136 | 1.0 | 1.0 | 11.900000 | 0.0 | 0.0 | 0.0 | 4.000000 |
| 1984 | 0.0 | 0.0 | 0.0 | 2.0 | 25.883333 | 4.0 | 556.416667 | 0.0 | 3.000 | 2.611111 | ... | 21.50 | 2253.234475 | 14208.50701 | 0.0 | 0.0 | 11.166667 | 0.0 | 0.5 | 1.0 | 2.333333 |
5 rows × 131 columns
# using the bar_chart_race package
bcr.bar_chart_race(
# DataFrame with each row representing a single period of time.
df = df,
# Figure properties
fig_kwargs = {
'figsize': (8, 3),
'dpi': 120,
'facecolor': '#F8FAFF'
},
# Horizontal orientation of bars
orientation = "h",
# Bars sorted in descending order
sort = "desc",
# Number of Bars in Each Frame
n_bars = 10,
# Fixing the Maximum Value of x-axis
# fixed_max = True,
# Animation Smoothness
steps_per_period = 45,
# Time Period in ms for Each Row
period_length = 1500,
# Custom Colors
colors=[
'#6ECBCE', '#FF2243', '#FFC33D', '#CE9673', '#FFA0FF', '#6501E5', '#F79522', '#699AF8', '#34718E', '#00DBCD',
'#00A3FF', '#F8A737', '#56BD5B', '#D40CE5', '#6936F9', '#FF317B', '#0000F3', '#FFA0A0', '#31FF83', '#0556F3'
],
# Title
title = 'Top 10 Countries with Maximum Number of Publications 1980-2021',
# Adjusting Opacity and Width of Bars
bar_kwargs={'alpha': .90, 'lw': 0},
# Adjusting the Bar Label Format
bar_texttemplate='{x:.0f}',
# Adjusting the Period Label Format
period_template='{x:.0f}',
# filename='bi.mp4'
)
import matplotlib.pyplot as plt
import plotly.express as px
# df2=pd.read_csv("AI publication time series by country.csv")
# df2.head()
# df2 = px.data.gapminder().query("Country=='India'")
# # fig = px.line(df2, x="Year", y="Value", title='Papers in India')
# # fig.show()
df2 = pd.read_csv("data\publications_per_year.csv")
df2.head()
values_usa=df2['United States']
values_china=df2['China']
values_india=df2['India']
values_eu27=df2['EU (27)']
values_uk=df2['United Kingdom']
values_brazil=df2['Brazil']
years=df2['Year']
plt.figure(figsize=(15,9))
plt.plot(years,values_usa,marker='*',markersize=7, label='USA')
plt.plot(years,values_china,marker='*',markersize=7, label='China')
plt.plot(years,values_india,marker='*',markersize=7, label= 'India')
plt.plot(years,values_eu27,marker='*',markersize=7, label= 'EU (27)')
plt.plot(years,values_uk,marker='*',markersize=7, label= 'United Kingdom')
plt.plot(years,values_brazil,marker='*',markersize=7, label= 'Brazil')
plt.xlabel('Years')
plt.ylabel('No of AI Research Publications')
plt.title('No of AI Research Publications vs Years')
plt.grid(True)
plt.legend()
plt.show()
df3=pd.read_csv("data\AI research by institution.csv")
df3.head()
| Year | Chinese Academy of Sciences | Tsinghua University | Massachusetts Institute of Technology | Stanford University | Shanghai Jiao Tong University | University of Michigan | Harvard University | Max Planck Society | Zhejiang University | Carnegie Mellon University | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1980 | 1.000000 | 1.000000 | 204.833333 | 202.552020 | 0.000000 | 130.817100 | 128.492857 | 83.000000 | 0.000000 | 111.338095 |
| 1 | 1981 | 3.166667 | 5.083333 | 240.991667 | 221.825000 | 0.000000 | 148.477381 | 169.448413 | 90.566667 | 2.000000 | 134.969697 |
| 2 | 1982 | 2.833333 | 3.000000 | 253.833333 | 232.009957 | 3.500000 | 170.967857 | 154.605988 | 96.816667 | 0.333333 | 106.000000 |
| 3 | 1983 | 8.166667 | 7.000000 | 275.039827 | 219.934524 | 3.500000 | 171.625000 | 189.644877 | 110.693831 | 1.000000 | 171.009524 |
| 4 | 1984 | 5.500000 | 5.000000 | 270.991667 | 251.114286 | 2.833333 | 223.785714 | 172.583333 | 102.987989 | 0.500000 | 182.283333 |
years=df3['Year']
# years
values_chinese=df3['Chinese Academy of Sciences']
values_tsinghua=df3['Tsinghua University']
values_massachusetts=df3['Massachusetts Institute of Technology']
values_stanford=df3['Stanford University']
values_maxplanck=df3['Max Planck Society']
values_harvard=df3['Harvard University']
plt.figure(figsize=(15,9))
plt.plot(years,values_chinese,marker='*',markersize=7, label='Chinese Academy of Sciences')
plt.plot(years,values_tsinghua,marker='*',markersize=7, label='Tsinghua University')
plt.plot(years,values_massachusetts,marker='*',markersize=7, label= 'Massachusetts Institute of Technology')
plt.plot(years,values_stanford,marker='*',markersize=7, label= 'Stanford University')
plt.plot(years,values_maxplanck,marker='*',markersize=7, label= 'Max Planck Society')
plt.plot(years,values_harvard,marker='*',markersize=7, label= 'Harvard University')
plt.xlabel('Years')
plt.ylabel('No of AI Research Publications of Different Universities')
plt.title('No of AI Research Publications vs Years')
plt.grid(True)
plt.legend()
plt.show()
# import CountryCode
CountryCode=pd.read_csv('https://pkgstore.datahub.io/JohnSnowLabs/country-and-continent-codes-list/country-and-continent-codes-list-csv_csv/data/b7876b7f496677669644f3d1069d3121/country-and-continent-codes-list-csv_csv.csv',sep=',')
CountryCode
data=pd.read_csv('data\GDPvsPapers.csv')
data.head()
| Year | Country | no of AI Research Publication | GDP Per Capita | Total number of number of All Research Publication | |
|---|---|---|---|---|---|
| 0 | 1980 | AFG | 0.0 | 272.655510 | 0.0 |
| 1 | 1981 | AFG | 0.0 | 264.111197 | 0.0 |
| 2 | 1982 | AFG | 0.0 | 260.078596 | 0.0 |
| 3 | 1983 | AFG | 0.0 | 256.045995 | 0.0 |
| 4 | 1984 | AFG | 0.0 | 252.013395 | 1.0 |
CountryCode=CountryCode[['Continent_Name','Three_Letter_Country_Code']]
df_final=pd.merge(data,CountryCode, left_on='Country',right_on='Three_Letter_Country_Code',how='left')
df_final = df_final.iloc[:, :-1] #remove last column
df_final['Continent_Name'] = df_final['Continent_Name'].replace(np.nan, 'European Union')
df_final
| Year | Country | no of AI Research Publication | GDP Per Capita | Total number of number of All Research Publication | Continent_Name | |
|---|---|---|---|---|---|---|
| 0 | 1980 | AFG | 0.000000 | 272.655510 | 0.000000 | Asia |
| 1 | 1981 | AFG | 0.000000 | 264.111197 | 0.000000 | Asia |
| 2 | 1982 | AFG | 0.000000 | 260.078596 | 0.000000 | Asia |
| 3 | 1983 | AFG | 0.000000 | 256.045995 | 0.000000 | Asia |
| 4 | 1984 | AFG | 0.000000 | 252.013395 | 1.000000 | Asia |
| ... | ... | ... | ... | ... | ... | ... |
| 6757 | 2017 | ZWE | 143.498430 | 1335.665064 | 1081.028519 | Africa |
| 6758 | 2018 | ZWE | 140.172395 | 1352.162653 | 1088.801098 | Africa |
| 6759 | 2019 | ZWE | 146.150396 | 1156.154864 | 1161.211884 | Africa |
| 6760 | 2020 | ZWE | 176.921210 | 1128.210711 | 1392.711900 | Africa |
| 6761 | 2021 | ZWE | 165.467014 | 1128.210711 | 1354.236918 | Africa |
6762 rows × 6 columns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
pio.renderers.default='notebook_connected'
fig = px.scatter(df_final,x='no of AI Research Publication', y='GDP Per Capita',animation_frame='Year',
animation_group='Country',size='Total number of number of All Research Publication',
color='Continent_Name',
hover_name='Country', text='Country',
size_max=45,range_x=[0,150000], range_y=[0,110000])
fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 700
textfont=dict(
family="sans serif",
size=18,
color="LightSeaGreen"
)
fig.update_layout(uniformtext_minsize=10, uniformtext_mode='hide')
# fig.update_traces(textfont_size=5)
# fig.update_traces(textposition='inside', textfont_size=14)
fig.show(renderer='notebook_connected')
df4 = px.data.gapminder()
fig = px.scatter(df_final, x = 'GDP Per Capita', y = 'no of AI Research Publication' ,animation_frame = 'Year',
animation_group = 'Country', size = 'Total number of number of All Research Publication', color = 'Continent_Name', hover_name = 'Country',
facet_col = 'Continent_Name', log_x=True, size_max=45, range_x=[10,110000], range_y=[0,150000])
fig.update_layout(
title='No. of AI Publications vs Per Capita GDP, 1980-2021',
yaxis=dict(
title='No. of AI Publications',
gridcolor='white',
gridwidth=2,
))
fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
for i in range(1,8):
fig.update_xaxes(title_text='GDP/Capita', row=1, col=i)
fig.show()